package fetcher

import (
	"bufio"
	"fmt"
	"github.com/PuerkitoBio/goquery"
	"golang.org/x/net/html/charset"
	"golang.org/x/text/encoding"
	"golang.org/x/text/encoding/unicode"
	"golang.org/x/text/transform"
	"log"
	"math/rand"
	"net/http"
	"time"
)

/**
 * Fetch: 根据url地址，发送http请求获取网页内容，返回页面字符串和错误信息
 */
 func Fetch(url string) (*goquery.Document, error){
 	request, _ := http.NewRequest(http.MethodGet, url, nil)
 	request.Header.Add("User-Agent", getAgent())
 	client := http.Client{
 		CheckRedirect: func(req *http.Request, via []*http.Request) error {
 			return nil
		},
	}
 	resp, err := client.Do(request)
 	defer resp.Body.Close()

 	if err != nil {
 		return nil, fmt.Errorf("Wrong status code %d", resp.StatusCode)
	}

 	bodyReader := bufio.NewReader(resp.Body)
 	// 检测网页编码类型，并尝试转为UTF8
 	e := determineEncoding(bodyReader)
 	utf8Reader := transform.NewReader(bodyReader, e.NewDecoder())
	doc, err := goquery.NewDocumentFromReader(utf8Reader)
	if err != nil {
		log.Print("Got Document Failed")
	}
 	return doc, nil
 }


 /**
  * getAgent: 获取一个随机的浏览器信息进行伪造
  */
  func getAgent() string {
	  agent  := [...]string{
		  "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:50.0) Gecko/20100101 Firefox/50.0",
		  "Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; en) Presto/2.8.131 Version/11.11",
		  "Opera/9.80 (Windows NT 6.1; U; en) Presto/2.8.131 Version/11.11",
		  "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; 360SE)",
		  "Mozilla/5.0 (Windows NT 6.1; rv:2.0.1) Gecko/20100101 Firefox/4.0.1",
		  "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; The World)",
		  "User-Agent,Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50",
		  "User-Agent, Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Maxthon 2.0)",
		  "User-Agent,Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50",
	  }

	  r := rand.New(rand.NewSource(time.Now().UnixNano()))
	  len := len(agent)
	  return agent[r.Intn(len)]
  }

  /**
   * 检测网页编码方式
   */
  func determineEncoding(
  	r *bufio.Reader) encoding.Encoding {
  		bytes, err := r.Peek(1024)
  		if err != nil {
  			log.Printf("Fetch error: %v", err)
  			return unicode.UTF8
		}
  		e, _, _ := charset.DetermineEncoding(bytes, "")
  		return e
  }